今天我們要來實作ResNet,並比較加入ResNet後,是否正確率有提升。
我們使用Colab來當作我們的實作平台,並使用Keras來完成。
fashion_mnist,為Keras內建的資料集
訓練集為60,000 張28x28 像素灰度圖像,測試集為10,000 同規格圖像,總共10 類時尚物品標籤。該數據集可以用作MNIST 的直接替代品。類別標籤是:
類別 描述 中文
0 T-shirt/top T卹/上衣
1 Trouser 褲子
2 Pullover 套頭衫
3 Dress 連衣裙
4 Coat 外套
5 Sandal 涼鞋
6 Shirt 襯衫
7 Sneaker 運動鞋
8 Bag 背包
9 Ankle boot 短靴
讀取資料集後,把數值都scale到0~1之間。
並設定batch_size=150、epoch=25
from keras.layers import Input, Dense, Conv1D, Conv2D, MaxPooling1D,\
MaxPooling2D, UpSampling1D, UpSampling2D, Dropout, Lambda, Convolution2D,\
Reshape, Activation, Flatten, add, concatenate, Subtract, BatchNormalization
from keras.models import Model, Sequential
from keras.datasets import fashion_mnist
import numpy as np
import keras
nb_classes=10
nb_epoch=25
batch_size=150
(x_train, y_train), (x_test, y_test) = fashion_mnist.load_data()
x_train = x_train.astype('float32') / 255.0
x_test = x_test.astype('float32') / 255.0
x_train = np.reshape(x_train, (x_train.shape[0], x_train.shape[1], x_train.shape[2], 1))
x_test = np.reshape(x_test, (x_test.shape[0], x_test.shape[1], x_test.shape[2], 1))
#12層
tag = 'ResNet0910_fashion_mnist_12layer_{}'.format(0)
h5_weight_path = os.path.join(WEIGHT_DIR, './' + tag + '.h5')
input_shape=(28,28,1)
input = Input(input_shape, name='input')
layer=Conv2D(32, kernel_size=(2, 2), activation='relu', padding='same')(input)
layer=BatchNormalization()(layer)
for i in range(2):
layer=Conv2D(32, kernel_size=(2, 2), activation='relu',padding='same')(layer)
layer=BatchNormalization()(layer)
layer=MaxPooling2D(pool_size=(2, 2))(layer)
for i in range(3):
layer=Conv2D(64, kernel_size=(2, 2), activation='relu',padding='same')(layer)
layer=BatchNormalization()(layer)
layer=MaxPooling2D(pool_size=(2, 2))(layer)
for i in range(3):
layer=Conv2D(128, kernel_size=(2, 2), activation='relu',padding='same')(layer)
layer=BatchNormalization()(layer)
layer=MaxPooling2D(pool_size=(2, 2))(layer)
for i in range(3):
layer=Conv2D(256, kernel_size=(2, 2), activation='relu',padding='same')(layer)
layer=BatchNormalization()(layer)
layer=MaxPooling2D(pool_size=(2, 2))(layer)
layer = Dropout(0.5)(layer)
layer = Flatten(name='flatten')(layer)
output = Dense(nb_classes, name="Dense_10nb", activation='softmax')(layer)
model = Model(inputs=[input], outputs=[output])
model.compile(loss='sparse_categorical_crossentropy',optimizer=keras.optimizers.Adam(lr=0.0001,decay=1e-6),metrics = ['accuracy'])
model.summary()
#20層
tag = 'ResNet0910_fashion_mnist_20layer_{}'.format(0)
h5_weight_path = os.path.join(WEIGHT_DIR, './' + tag + '.h5')
input_shape=(28,28,1)
input = Input(input_shape, name='input')
layer=Conv2D(32, kernel_size=(2, 2), activation='relu', padding='same')(input)
layer=BatchNormalization()(layer)
for i in range(4):
layer=Conv2D(32, kernel_size=(2, 2), activation='relu',padding='same')(layer)
layer=BatchNormalization()(layer)
layer=MaxPooling2D(pool_size=(2, 2))(layer)
for i in range(5):
layer=Conv2D(64, kernel_size=(2, 2), activation='relu',padding='same')(layer)
layer=BatchNormalization()(layer)
layer=MaxPooling2D(pool_size=(2, 2))(layer)
for i in range(5):
layer=Conv2D(128, kernel_size=(2, 2), activation='relu',padding='same')(layer)
layer=BatchNormalization()(layer)
layer=MaxPooling2D(pool_size=(2, 2))(layer)
for i in range(5):
layer=Conv2D(256, kernel_size=(2, 2), activation='relu',padding='same')(layer)
layer=BatchNormalization()(layer)
layer=MaxPooling2D(pool_size=(2, 2))(layer)
layer = Dropout(0.5)(layer)
layer = Flatten(name='flatten')(layer)
output = Dense(nb_classes, name="Dense_10nb", activation='softmax')(layer)
model = Model(inputs=[input], outputs=[output])
model.compile(loss='sparse_categorical_crossentropy',optimizer=keras.optimizers.Adam(lr=0.0001,decay=1e-6),metrics = ['accuracy'])
model.summary()
#12層 ResNet
tag = 'ResNet0910_fashion_mnist_12layerResNet_{}'.format(0)
h5_weight_path = os.path.join(WEIGHT_DIR, './' + tag + '.h5')
input_shape=(28,28,1)
input = Input(input_shape, name='input')
layer=Conv2D(32, kernel_size=(2, 2), activation='relu', padding='same')(input)
layer=BatchNormalization()(layer)
Resnet=layer
for i in range(2):
layer=Conv2D(32, kernel_size=(2, 2), activation='relu',padding='same')(layer)
layer=BatchNormalization()(layer)
layer=add([layer,Resnet])
layer=MaxPooling2D(pool_size=(2, 2))(layer)
Resnet=Conv2D(64, kernel_size=(1, 1), activation='relu',padding='same')(layer)
for i in range(3):
layer=Conv2D(64, kernel_size=(2, 2), activation='relu',padding='same')(layer)
layer=BatchNormalization()(layer)
layer=add([layer,Resnet])
layer=MaxPooling2D(pool_size=(2, 2))(layer)
Resnet=Conv2D(128, kernel_size=(1, 1), activation='relu',padding='same')(layer)
for i in range(3):
layer=Conv2D(128, kernel_size=(2, 2), activation='relu',padding='same')(layer)
layer=BatchNormalization()(layer)
layer=add([layer,Resnet])
layer=MaxPooling2D(pool_size=(2, 2))(layer)
Resnet=Conv2D(256, kernel_size=(1, 1), activation='relu',padding='same')(layer)
for i in range(3):
layer=Conv2D(256, kernel_size=(2, 2), activation='relu',padding='same')(layer)
layer=BatchNormalization()(layer)
layer=add([layer,Resnet])
layer=MaxPooling2D(pool_size=(2, 2))(layer)
layer = Dropout(0.5)(layer)
layer = Flatten(name='flatten')(layer)
output = Dense(nb_classes, name="Dense_10nb", activation='softmax')(layer)
model = Model(inputs=[input], outputs=[output])
model.compile(loss='sparse_categorical_crossentropy',optimizer=keras.optimizers.Adam(lr=0.0001,decay=1e-6),metrics = ['accuracy'])
model.summary()
#20層 ResNet
tag = 'ResNet0910_fashion_mnist_20layer_ResNet_{}'.format(0)
h5_weight_path = os.path.join(WEIGHT_DIR, './' + tag + '.h5')
input_shape=(28,28,1)
input = Input(input_shape, name='input')
layer=Conv2D(32, kernel_size=(2, 2), activation='relu', padding='same')(input)
layer=BatchNormalization()(layer)
Resnet=layer
for i in range(4):
layer=Conv2D(32, kernel_size=(2, 2), activation='relu',padding='same')(layer)
layer=BatchNormalization()(layer)
layer=add([layer,Resnet])
layer=MaxPooling2D(pool_size=(2, 2))(layer)
Resnet=Conv2D(64, kernel_size=(1, 1), activation='relu',padding='same')(layer)
for i in range(5):
layer=Conv2D(64, kernel_size=(2, 2), activation='relu',padding='same')(layer)
layer=BatchNormalization()(layer)
layer=add([layer,Resnet])
layer=MaxPooling2D(pool_size=(2, 2))(layer)
Resnet=Conv2D(128, kernel_size=(1, 1), activation='relu',padding='same')(layer)
for i in range(5):
layer=Conv2D(128, kernel_size=(2, 2), activation='relu',padding='same')(layer)
layer=BatchNormalization()(layer)
layer=add([layer,Resnet])
layer=MaxPooling2D(pool_size=(2, 2))(layer)
Resnet=Conv2D(256, kernel_size=(1, 1), activation='relu',padding='same')(layer)
for i in range(5):
layer=Conv2D(256, kernel_size=(2, 2), activation='relu',padding='same')(layer)
layer=BatchNormalization()(layer)
layer=add([layer,Resnet])
layer=MaxPooling2D(pool_size=(2, 2))(layer)
layer = Dropout(0.5)(layer)
layer = Flatten(name='flatten')(layer)
output = Dense(nb_classes, name="Dense_10nb", activation='softmax')(layer)
model = Model(inputs=[input], outputs=[output])
model.compile(loss='sparse_categorical_crossentropy',optimizer=keras.optimizers.Adam(lr=0.0001,decay=1e-6),metrics = ['accuracy'])
model.summary()
層數 | 12層 | 20層 | 32層 | 50層 |
---|---|---|---|---|
正確率 | 88.09% | 87.20% | 84.61% | 23.74% |
在我們這種設計下,可以發現越多層數,我們的正確率越低。
那麼我們加入ResNet試試看正確率是否能提高。
層數 | 12層 | 20層 | 32層 | 50層 |
---|---|---|---|---|
正確率 | 89.43% | 88.84% | 89.43% | 86.09% |
可以發現使用ResNet後,正確率都有提升了!
在沒加入ResNet時,12層正確率最好,而50層正確率最低,因為太過深層了!
使用ResNet後,每一種層數的正確率都有提升,尤其是50層的,從完全無法看,變得人模人樣了(誤。
Deep Residual Learning for Image Recognition